Libraries
## Loading required package: lattice
## Loading required package: ggplot2
## Loading required package: xml2
##
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
##
## train
# _____ Hyper-parameters _____
nObs <- 1000 # It must be even
nSigma <- 10
sigmas <- seq(from = 1e-4, to = 1, length.out = nSigma)
indexSigma <- 10
# _______________
for (indexSigma in 1:nSigma) {
sigma <- sigmas[indexSigma]
set.seed(1998)
labelPosition <- sample(1:nObs, nObs/2)
y <- rep(0, nObs)
y[labelPosition] <- 1
set.seed(1998)
x <- y + rnorm(length(y), mean = 0, sd = sigma)
dd <- data.frame("observation" = 1:nObs,
"Feature" = x,
"Label" = as.character(y))
print(ggplot(dd, aes(x = observation, y = Feature, col = Label)) +
geom_point() + xlab(element_blank()) +
ggtitle(paste("Features M1 - Sigma = ", sigma)))
set.seed(20) # No poner 1998!
inSampleInd <- sample(length(x), ceiling(0.8*nObs))
trnInd <- sample(length(inSampleInd), ceiling(0.8*length(inSampleInd)))
xinSample <- x[inSampleInd]
yinSample <- y[inSampleInd]
xTrn <- xinSample[trnInd]
yTrn <- yinSample[trnInd]
xVal <- xinSample[-trnInd]
yVal <- yinSample[-trnInd]
xTst <- x[-inSampleInd]
yTst <- y[-inSampleInd]
set.seed(1998)
primaryModel <- keras_model_sequential() %>%
layer_flatten(input_shape = 1) %>%
layer_dense(units = 20) %>%
layer_activation_leaky_relu() %>%
layer_dense(units = 1, activation = "sigmoid")
# Input format:
# x <-> Matrix
# y <-> vector
primaryModel %>%
compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = c("Recall", "Precision")
)
history <- primaryModel %>% fit(
x = xTrn,
y = yTrn,
epochs = 50,
batch_size = 256,
validation_data = list(xVal, yVal)
# validation_split = 0.2
)
yHatTrn <- primaryModel %>% predict(xTrn)
yHatVal <- primaryModel %>% predict(xVal)
yHatTst <- primaryModel %>% predict(xTst)
# _____ Choosing a threshold _____
df <- data.frame("predictions" = yHatTrn,
"labels" = yTrn)
pred <- prediction(df$predictions, df$labels)
perf <- performance(pred, "tpr", "fpr")
par(pty = "s")
plot(
perf,
col = "grey40",
lwd = 3,
main = paste("ROC M1 (Train) - Sigma = ", sigma)
)
# Line with a: intercept, b: slope, lty = line type
abline(a = 0, b = 1, lty = "dotted")
# _______________
thresholds <- seq(from = 0, to = 1, length.out = 11)
pointsROC <- matrix(rep(NA, 2*length(thresholds)),
ncol = 2, nrow = length(thresholds))
indGood <- NA
for (i in length(thresholds):1) {
th <- thresholds[i]
TP <- sum(yHatTrn > th & yTrn == 1)
FN <- sum(yHatTrn <= th & yTrn == 1)
FP <- sum(yHatTrn > th & yTrn == 0)
TN <- sum(yHatTrn <= th & yTrn == 0)
TPR <- TP/(TP + FN)
FPR <- FP/(FP + TN)
if (TPR > 0.8 & is.na(indGood)) indGood <- i
pointsROC[i, ] <- c(FPR, TPR)
points(FPR, TPR, pch = 19, col = "blue")
}
# _____ Manual adjustment _____
th <- thresholds[indGood]
points(pointsROC[indGood, 1], pointsROC[indGood, 2], pch = 19, col = "red")
yHatTrn <- yHatTrn > th
yHatVal <- yHatVal > th
yHatTst <- yHatTst > th
# prediction_for_table <- predict(rf_classifier,validation1[,-5])
# table(observed=validation1[,5],predicted=prediction_for_table)
# _______________
# _____ Metrics _____
# _____ Train _____
callMetrics <- getMetrics(actual = yTrn, predicted = yHatTrn)
precisionM1Trn <- callMetrics$precision
recallM1Trn <- callMetrics$recall
f1ScoreM1Trn <- callMetrics$f1Score
# _______________
# _____ Validation _____
callMetrics <- getMetrics(actual = yVal, predicted = yHatVal)
precisionM1Val <- callMetrics$precision
recallM1Val <- callMetrics$recall
f1ScoreM1Val <- callMetrics$f1Score
# _______________
# _____ Test _____
callMetrics <- getMetrics(actual = yTst, predicted = yHatTst)
precisionM1Tst <- callMetrics$precision
recallM1Tst <- callMetrics$recall
f1ScoreM1Tst <- callMetrics$f1Score
# _______________
# _____ Secondary Model _____
xM2Trn <- cbind(yHatTrn, xTrn)
yM2Trn <- (yHatTrn == 1) & (yTrn == 1)
xM2Val <- cbind(yHatVal, xVal)
yM2Val <- (yHatVal == 1) & (yVal == 1)
xM2Tst <- cbind(yHatTst, xTst)
yM2Tst <- (yHatTst == 1) & (yTst == 1)
set.seed(1998)
secondaryModel <- keras_model_sequential() %>%
layer_flatten(input_shape = ncol(xM2Trn)) %>%
layer_dense(units = 25) %>%
layer_activation_leaky_relu() %>%
layer_dense(units = 1, activation = "sigmoid")
# Input format:
# x <-> Matrix
# y <-> vector
secondaryModel %>% compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = c("Recall", "Precision")
)
history <- secondaryModel %>% fit(
x = xM2Trn,
y = yM2Trn,
epochs = 40,
batch_size = 256,
validation_data = list(xM2Val, yM2Val)
)
yHatM2Trn <- secondaryModel %>% predict_classes(xM2Trn)
yHatM2Val <- secondaryModel %>% predict_classes(xM2Val)
yHatM2Tst <- secondaryModel %>% predict_classes(xM2Tst)
yHatMMTrn <- (yHatTrn == 1) & (yHatM2Trn == 1)
yHatMMVal <- (yHatVal == 1) & (yHatM2Val == 1)
yHatMMTst <- (yHatTst == 1) & (yHatM2Tst == 1)
# _____ Train _____
callMetrics <- getMetrics(actual = yTrn, predicted = yHatMMTrn)
precisionMMTrn <- callMetrics$precision
recallMMTrn <- callMetrics$recall
f1ScoreMMTrn <- callMetrics$f1Score
resultsTrn <- matrix(c(precisionM1Trn, precisionMMTrn,
recallM1Trn, recallMMTrn,
f1ScoreM1Trn, f1ScoreMMTrn),
nrow = 3, ncol = 2, byrow = TRUE)
rownames(resultsTrn) <- c("Precision", "Recall", "F1-Score")
colnames(resultsTrn) <- c("Primary Model", "Meta Model")
# _______________
# _____ Validation _____
callMetrics <- getMetrics(actual = yVal, predicted = yHatMMVal)
precisionMMVal <- callMetrics$precision
recallMMVal <- callMetrics$recall
f1ScoreMMVal <- callMetrics$f1Score
resultsVal <- matrix(c(precisionM1Val, precisionMMVal,
recallM1Val, recallMMVal,
f1ScoreM1Val, f1ScoreMMVal),
nrow = 3, ncol = 2, byrow = TRUE)
rownames(resultsVal) <- c("Precision", "Recall", "F1-Score")
colnames(resultsVal) <- c("Primary Model", "Meta Model")
# _______________
# _____ Test _____
callMetrics <- getMetrics(actual = yTst, predicted = yHatMMTst)
precisionMMTst <- callMetrics$precision
recallMMTst <- callMetrics$recall
f1ScoreMMTst <- callMetrics$f1Score
resultsTst <- matrix(c(precisionM1Tst, precisionMMTst,
recallM1Tst, recallMMTst,
f1ScoreM1Tst, f1ScoreMMTst),
nrow = 3, ncol = 2, byrow = TRUE)
rownames(resultsTst) <- c("Precision", "Recall", "F1-Score")
colnames(resultsTst) <- c("Primary Model", "Meta Model")
# _______________
allResults[[indexSigma]] <- resultsTst
print(resultsTrn)
print(resultsVal)
print(resultsTst)
}


## Primary Model Meta Model
## Precision 1 1
## Recall 1 1
## F1-Score 1 1
## Primary Model Meta Model
## Precision 1 1
## Recall 1 1
## F1-Score 1 1
## Primary Model Meta Model
## Precision 1 1
## Recall 1 1
## F1-Score 1 1


## Primary Model Meta Model
## Precision 1.0000000 1.0000000
## Recall 0.9969231 0.9969231
## F1-Score 0.9984592 0.9984592
## Primary Model Meta Model
## Precision 1.0000000 1.0000000
## Recall 0.9875000 0.9875000
## F1-Score 0.9937107 0.9937107
## Primary Model Meta Model
## Precision 1 1
## Recall 1 1
## F1-Score 1 1


## Primary Model Meta Model
## Precision 0.9966555 0.9966555
## Recall 0.9169231 0.9169231
## F1-Score 0.9551282 0.9551282
## Primary Model Meta Model
## Precision 1.0000000 1.0000000
## Recall 0.8625000 0.8625000
## F1-Score 0.9261745 0.9261745
## Primary Model Meta Model
## Precision 1.0000000 1.0000000
## Recall 0.8842105 0.8842105
## F1-Score 0.9385475 0.9385475


## Primary Model Meta Model
## Precision 0.9409938 0.9409938
## Recall 0.9323077 0.9323077
## F1-Score 0.9366306 0.9366306
## Primary Model Meta Model
## Precision 0.9012346 0.9012346
## Recall 0.9125000 0.9125000
## F1-Score 0.9068323 0.9068323
## Primary Model Meta Model
## Precision 0.9456522 0.9456522
## Recall 0.9157895 0.9157895
## F1-Score 0.9304813 0.9304813


## Primary Model Meta Model
## Precision 0.8597015 0.8597015
## Recall 0.8861538 0.8861538
## F1-Score 0.8727273 0.8727273
## Primary Model Meta Model
## Precision 0.8192771 0.8192771
## Recall 0.8500000 0.8500000
## F1-Score 0.8343558 0.8343558
## Primary Model Meta Model
## Precision 0.9111111 0.9111111
## Recall 0.8631579 0.8631579
## F1-Score 0.8864865 0.8864865


## Primary Model Meta Model
## Precision 0.7500000 0.7500000
## Recall 0.9230769 0.9230769
## F1-Score 0.8275862 0.8275862
## Primary Model Meta Model
## Precision 0.7000000 0.7000000
## Recall 0.8750000 0.8750000
## F1-Score 0.7777778 0.7777778
## Primary Model Meta Model
## Precision 0.7350427 0.7350427
## Recall 0.9052632 0.9052632
## F1-Score 0.8113208 0.8113208


## Primary Model Meta Model
## Precision 0.7263682 0.7263682
## Recall 0.8984615 0.8984615
## F1-Score 0.8033012 0.8033012
## Primary Model Meta Model
## Precision 0.6764706 0.6764706
## Recall 0.8625000 0.8625000
## F1-Score 0.7582418 0.7582418
## Primary Model Meta Model
## Precision 0.6747967 0.6747967
## Recall 0.8736842 0.8736842
## F1-Score 0.7614679 0.7614679


## Primary Model Meta Model
## Precision 0.7020202 0.7005076
## Recall 0.8553846 0.8492308
## F1-Score 0.7711512 0.7677330
## Primary Model Meta Model
## Precision 0.6666667 0.6666667
## Recall 0.8500000 0.8500000
## F1-Score 0.7472527 0.7472527
## Primary Model Meta Model
## Precision 0.6585366 0.6585366
## Recall 0.8526316 0.8526316
## F1-Score 0.7431193 0.7431193


## Primary Model Meta Model
## Precision 0.6620690 0.6966292
## Recall 0.8861538 0.7630769
## F1-Score 0.7578947 0.7283407
## Primary Model Meta Model
## Precision 0.6071429 0.6597938
## Recall 0.8500000 0.8000000
## F1-Score 0.7083333 0.7231638
## Primary Model Meta Model
## Precision 0.5886525 0.6752137
## Recall 0.8736842 0.8315789
## F1-Score 0.7033898 0.7452830


## Primary Model Meta Model
## Precision 0.6343612 0.6871345
## Recall 0.8861538 0.7230769
## F1-Score 0.7394095 0.7046477
## Primary Model Meta Model
## Precision 0.5964912 0.6451613
## Recall 0.8500000 0.7500000
## F1-Score 0.7010309 0.6936416
## Primary Model Meta Model
## Precision 0.5845070 0.6725664
## Recall 0.8736842 0.8000000
## F1-Score 0.7004219 0.7307692